{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cot import Collection"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### generate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to /home/kon/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "coll = Collection.load_thoughtsource_33()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "| Name           | Train   | Valid   | Test   |\n",
       "|----------------|---------|---------|--------|\n",
       "| commonsense_qa | -       | 33      | -      |\n",
       "| med_qa         | -       | -       | 33     |\n",
       "| medmc_qa       | -       | 33      | -      |\n",
       "| open_book_qa   | -       | -       | 33     |\n",
       "| strategy_qa    | 33      | -       | -      |\n",
       "| worldtree      | -       | -       | 33     |\n",
       "\n",
       "Not loaded: ['aqua', 'asdiv', 'entailment_bank', 'gsm8k', 'mawps', 'pubmed_qa', 'qed', 'svamp']"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coll"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration of the input and parameters of the language model \n",
    "config={\n",
    "    \"instruction_keys\": [None],\n",
    "    \"cot_trigger_keys\": [\"zhou-01\"],\n",
    "    \"answer_extraction_keys\": 'auto-kojima', \n",
    "    \"author\" : \"thoughtsource\",\n",
    "    \"api_service\": \"cohere\",\n",
    "    \"api_time_interval\": 1,\n",
    "    \"engine\": \"command-xlarge-nightly\", \n",
    "    \"temperature\": 0,\n",
    "    \"max_tokens\": 512,\n",
    "    \"verbose\": False,\n",
    "    \"warn\": False,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating commonsense_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b39e45de62b416a9c4b8d5804836add",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating med_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b83771625a754e1cbda0d613724c0982",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating medmc_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3d2aec496e7b42c3ad207aab985ef68a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating open_book_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8239528e870d4cdfbebbffca86fa00e9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating strategy_qa...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "58aa974dd82f4751b9bf90df4ad16885",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generating worldtree...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "87b43c7b8bd048e483a9a2bd5ef15d87",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "coll.generate(config=config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6514a6969b0c454fb9b34269f531d6cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c1a5ea642c714cd4a2c7d69e78f5317a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c3dc30124b47456c9123347df95295da",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "94143644332746c28ca219d2c4ce7463",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4dfd1e3e446940bdb98196e565fbeb41",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "32498a9f2f0c4c6c84482346b1dcef71",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'commonsense_qa': {'validation': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-A-E': 0.515152,\n",
       "     'None_kojima-01_kojima-A-E': 0.484848,\n",
       "     'None_zhou-01_kojima-A-E': 0.727273},\n",
       "    'flan-T5-xxl': {'None_None_kojima-A-E': 0.848485,\n",
       "     'None_kojima-01_kojima-A-E': 0.848485,\n",
       "     'None_zhou-01_kojima-A-E': 0.848485,\n",
       "     'qa-10_None_kojima-A-E': 0.848485,\n",
       "     'qa-12_None_kojima-A-E': 0.848485,\n",
       "     'qa-13_None_kojima-A-E': 0.848485,\n",
       "     'qa-16_None_kojima-A-E': 0.727273,\n",
       "     'qa-17_None_kojima-A-E': 0.848485,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.818182},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-A-E': 0.757576,\n",
       "     'None_kojima-01_kojima-A-E': 0.727273,\n",
       "     'None_kojima-03_kojima-A-E': 0.727273,\n",
       "     'None_kojima-09_kojima-A-E': 0.69697,\n",
       "     'None_zhou-01_kojima-A-E': 0.69697,\n",
       "     'qa-01_None_kojima-A-E': 0.636364,\n",
       "     'qa-05_None_kojima-A-E': 0.69697,\n",
       "     'qa-08_None_kojima-A-E': 0.666667,\n",
       "     'qa-09_None_kojima-A-E': 0.636364,\n",
       "     'qa-10_None_kojima-A-E': 0.69697,\n",
       "     'qa-12_None_kojima-A-E': 0.575758,\n",
       "     'qa-13_None_kojima-A-E': 0.636364,\n",
       "     'qa-16_None_kojima-A-E': 0.636364,\n",
       "     'qa-17_None_kojima-A-E': 0.606061,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.787879,\n",
       "     'zhou-01-ins_zhou-01_kojima-A-E': 0.636364},\n",
       "    'gpt-4': {'None_None_kojima-A-E': 0.727273,\n",
       "     'None_zhou-01_kojima-A-E': 0.757576,\n",
       "     'qa-10_None_kojima-A-E': 0.878788,\n",
       "     'qa-12_None_kojima-A-E': 0.69697,\n",
       "     'qa-13_None_kojima-A-E': 0.818182,\n",
       "     'qa-16_None_kojima-A-E': 0.666667,\n",
       "     'qa-17_None_kojima-A-E': 0.636364,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.757576},\n",
       "    'text-davinci-002': {'None_None_kojima-A-E': 0.757576,\n",
       "     'None_None_wei-01': 0.818182,\n",
       "     'None_kojima-01_kojima-A-E': 0.787879,\n",
       "     'None_zhou-01_kojima-A-E': 0.606061},\n",
       "    'text-davinci-003': {'None_None_kojima-A-E': 0.727273,\n",
       "     'None_kojima-01_kojima-A-E': 0.666667,\n",
       "     'None_zhou-01_kojima-A-E': 0.666667}}}},\n",
       " 'med_qa': {'test': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-A-E': 0.181818,\n",
       "     'None_kojima-01_kojima-A-E': 0.30303,\n",
       "     'None_zhou-01_kojima-A-E': 0.272727},\n",
       "    'flan-T5-xxl': {'None_None_kojima-A-E': 0.212121,\n",
       "     'None_kojima-01_kojima-A-E': 0.272727,\n",
       "     'None_zhou-01_kojima-A-E': 0.121212,\n",
       "     'qa-10_None_kojima-A-E': 0.242424,\n",
       "     'qa-12_None_kojima-A-E': 0.151515,\n",
       "     'qa-13_None_kojima-A-E': 0.242424,\n",
       "     'qa-16_None_kojima-A-E': 0.242424,\n",
       "     'qa-17_None_kojima-A-E': 0.242424,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.212121},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-A-E': 0.545455,\n",
       "     'None_kojima-01_kojima-A-E': 0.515152,\n",
       "     'None_kojima-03_kojima-A-E': 0.515152,\n",
       "     'None_kojima-09_kojima-A-E': 0.454545,\n",
       "     'None_zhou-01_kojima-A-E': 0.606061,\n",
       "     'qa-01_None_kojima-A-E': 0.515152,\n",
       "     'qa-05_None_kojima-A-E': 0.393939,\n",
       "     'qa-08_None_kojima-A-E': 0.363636,\n",
       "     'qa-09_None_kojima-A-E': 0.515152,\n",
       "     'qa-10_None_kojima-A-E': 0.424242,\n",
       "     'qa-12_None_kojima-A-E': 0.545455,\n",
       "     'qa-13_None_kojima-A-E': 0.424242,\n",
       "     'qa-16_None_kojima-A-E': 0.484848,\n",
       "     'qa-17_None_kojima-A-E': 0.515152,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.484848,\n",
       "     'zhou-01-ins_zhou-01_kojima-A-E': 0.454545},\n",
       "    'gpt-4': {'None_None_kojima-A-E': 0.636364,\n",
       "     'None_zhou-01_kojima-A-E': 0.727273,\n",
       "     'qa-10_None_kojima-A-E': 0.606061,\n",
       "     'qa-12_None_kojima-A-E': 0.666667,\n",
       "     'qa-13_None_kojima-A-E': 0.606061,\n",
       "     'qa-16_None_kojima-A-E': 0.636364,\n",
       "     'qa-17_None_kojima-A-E': 0.606061,\n",
       "     'zhou-01-ins_None_kojima-A-E': 0.606061},\n",
       "    'text-davinci-002': {'None_None_kojima-A-E': 0.333333,\n",
       "     'None_kojima-01_kojima-A-D': 0.242424,\n",
       "     'None_kojima-01_kojima-A-E': 0.121212,\n",
       "     'None_lievin-01_kojima-A-D': 0.242424,\n",
       "     'None_lievin-02_kojima-A-D': 0.333333,\n",
       "     'None_lievin-03_kojima-A-D': 0.454545,\n",
       "     'None_lievin-10_kojima-A-D': 0.333333,\n",
       "     'None_zhou-01_kojima-A-E': 0.242424},\n",
       "    'text-davinci-003': {'None_None_kojima-A-E': 0.30303,\n",
       "     'None_kojima-01_kojima-A-E': 0.333333,\n",
       "     'None_zhou-01_kojima-A-E': 0.393939}}}},\n",
       " 'medmc_qa': {'validation': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-A-D': 0.272727,\n",
       "     'None_kojima-01_kojima-A-D': 0.151515,\n",
       "     'None_zhou-01_kojima-A-D': 0.424242},\n",
       "    'flan-T5-xxl': {'None_None_kojima-A-D': 0.363636,\n",
       "     'None_kojima-01_kojima-A-D': 0.30303,\n",
       "     'None_zhou-01_kojima-A-D': 0.363636,\n",
       "     'qa-10_None_kojima-A-D': 0.333333,\n",
       "     'qa-12_None_kojima-A-D': 0.424242,\n",
       "     'qa-13_None_kojima-A-D': 0.363636,\n",
       "     'qa-16_None_kojima-A-D': 0.30303,\n",
       "     'qa-17_None_kojima-A-D': 0.393939,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.30303},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.636364,\n",
       "     'None_kojima-01_kojima-A-D': 0.575758,\n",
       "     'None_kojima-03_kojima-A-D': 0.606061,\n",
       "     'None_kojima-09_kojima-A-D': 0.545455,\n",
       "     'None_zhou-01_kojima-A-D': 0.606061,\n",
       "     'qa-01_None_kojima-A-D': 0.606061,\n",
       "     'qa-05_None_kojima-A-D': 0.636364,\n",
       "     'qa-08_None_kojima-A-D': 0.484848,\n",
       "     'qa-09_None_kojima-A-D': 0.666667,\n",
       "     'qa-10_None_kojima-A-D': 0.636364,\n",
       "     'qa-12_None_kojima-A-D': 0.636364,\n",
       "     'qa-13_None_kojima-A-D': 0.515152,\n",
       "     'qa-16_None_kojima-A-D': 0.636364,\n",
       "     'qa-17_None_kojima-A-D': 0.666667,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.484848,\n",
       "     'zhou-01-ins_zhou-01_kojima-A-D': 0.515152},\n",
       "    'gpt-4': {'None_None_kojima-A-D': 0.787879,\n",
       "     'None_zhou-01_kojima-A-D': 0.848485,\n",
       "     'qa-10_None_kojima-A-D': 0.757576,\n",
       "     'qa-12_None_kojima-A-D': 0.757576,\n",
       "     'qa-13_None_kojima-A-D': 0.757576,\n",
       "     'qa-16_None_kojima-A-D': 0.848485,\n",
       "     'qa-17_None_kojima-A-D': 0.666667,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.818182},\n",
       "    'text-davinci-002': {'None_None_kojima-A-D': 0.30303,\n",
       "     'None_kojima-01_kojima-A-D': 0.363636,\n",
       "     'None_lievin-01_kojima-A-D': 0.393939,\n",
       "     'None_lievin-02_kojima-A-D': 0.333333,\n",
       "     'None_lievin-03_kojima-A-D': 0.272727,\n",
       "     'None_lievin-10_kojima-A-D': 0.484848,\n",
       "     'None_zhou-01_kojima-A-D': 0.454545},\n",
       "    'text-davinci-003': {'None_None_kojima-A-D': 0.363636,\n",
       "     'None_kojima-01_kojima-A-D': 0.363636,\n",
       "     'None_zhou-01_kojima-A-D': 0.484848}}}},\n",
       " 'open_book_qa': {'test': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-A-D': 0.575758,\n",
       "     'None_kojima-01_kojima-A-D': 0.424242,\n",
       "     'None_zhou-01_kojima-A-D': 0.606061},\n",
       "    'flan-T5-xxl': {'None_None_kojima-A-D': 0.757576,\n",
       "     'None_kojima-01_kojima-A-D': 0.818182,\n",
       "     'None_zhou-01_kojima-A-D': 0.818182,\n",
       "     'qa-10_None_kojima-A-D': 0.787879,\n",
       "     'qa-12_None_kojima-A-D': 0.787879,\n",
       "     'qa-13_None_kojima-A-D': 0.757576,\n",
       "     'qa-16_None_kojima-A-D': 0.666667,\n",
       "     'qa-17_None_kojima-A-D': 0.727273,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.757576},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.848485,\n",
       "     'None_kojima-01_kojima-A-D': 0.787879,\n",
       "     'None_kojima-03_kojima-A-D': 0.818182,\n",
       "     'None_kojima-09_kojima-A-D': 0.757576,\n",
       "     'None_zhou-01_kojima-A-D': 0.727273,\n",
       "     'qa-01_None_kojima-A-D': 0.787879,\n",
       "     'qa-05_None_kojima-A-D': 0.606061,\n",
       "     'qa-08_None_kojima-A-D': 0.727273,\n",
       "     'qa-09_None_kojima-A-D': 0.636364,\n",
       "     'qa-10_None_kojima-A-D': 0.757576,\n",
       "     'qa-12_None_kojima-A-D': 0.848485,\n",
       "     'qa-13_None_kojima-A-D': 0.727273,\n",
       "     'qa-16_None_kojima-A-D': 0.666667,\n",
       "     'qa-17_None_kojima-A-D': 0.727273,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.727273,\n",
       "     'zhou-01-ins_zhou-01_kojima-A-D': 0.787879},\n",
       "    'gpt-4': {'None_None_kojima-A-D': 0.909091,\n",
       "     'None_zhou-01_kojima-A-D': 0.969697,\n",
       "     'qa-10_None_kojima-A-D': 0.939394,\n",
       "     'qa-12_None_kojima-A-D': 0.909091,\n",
       "     'qa-13_None_kojima-A-D': 0.878788,\n",
       "     'qa-16_None_kojima-A-D': 0.848485,\n",
       "     'qa-17_None_kojima-A-D': 0.909091,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.939394},\n",
       "    'text-davinci-002': {'None_None_kojima-A-D': 0.636364,\n",
       "     'None_kojima-01_kojima-A-D': 0.545455,\n",
       "     'None_zhou-01_kojima-A-D': 0.757576},\n",
       "    'text-davinci-003': {'None_None_kojima-A-D': 0.636364,\n",
       "     'None_kojima-01_kojima-A-D': 0.575758,\n",
       "     'None_zhou-01_kojima-A-D': 0.787879}}}},\n",
       " 'strategy_qa': {'train': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-yes-no': 0.545455,\n",
       "     'None_kojima-01_kojima-yes-no': 0.575758,\n",
       "     'None_zhou-01_kojima-yes-no': 0.242424},\n",
       "    'flan-T5-xxl': {'None_None_kojima-yes-no': 0.666667,\n",
       "     'None_kojima-01_kojima-yes-no': 0.69697,\n",
       "     'None_zhou-01_kojima-yes-no': 0.484848,\n",
       "     'qa-10_None_kojima-yes-no': 0.575758,\n",
       "     'qa-12_None_kojima-yes-no': 0.666667,\n",
       "     'qa-13_None_kojima-yes-no': 0.666667,\n",
       "     'qa-16_None_kojima-yes-no': 0.666667,\n",
       "     'qa-17_None_kojima-yes-no': 0.666667,\n",
       "     'zhou-01-ins_None_kojima-yes-no': 0.606061},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-yes-no': 0.545455,\n",
       "     'None_kojima-01_kojima-yes-no': 0.606061,\n",
       "     'None_kojima-03_kojima-yes-no': 0.545455,\n",
       "     'None_kojima-09_kojima-yes-no': 0.636364,\n",
       "     'None_zhou-01_kojima-yes-no': 0.606061,\n",
       "     'qa-01_None_kojima-yes-no': 0.545455,\n",
       "     'qa-05_None_kojima-yes-no': 0.454545,\n",
       "     'qa-08_None_kojima-yes-no': 0.636364,\n",
       "     'qa-09_None_kojima-yes-no': 0.69697,\n",
       "     'qa-10_None_kojima-yes-no': 0.606061,\n",
       "     'qa-12_None_kojima-yes-no': 0.515152,\n",
       "     'qa-13_None_kojima-yes-no': 0.69697,\n",
       "     'qa-16_None_kojima-yes-no': 0.636364,\n",
       "     'qa-17_None_kojima-yes-no': 0.666667,\n",
       "     'zhou-01-ins_None_kojima-yes-no': 0.575758,\n",
       "     'zhou-01-ins_zhou-01_kojima-yes-no': 0.575758},\n",
       "    'gpt-4': {'None_None_kojima-yes-no': 0.727273,\n",
       "     'None_zhou-01_kojima-yes-no': 0.909091,\n",
       "     'qa-10_None_kojima-yes-no': 0.848485,\n",
       "     'qa-12_None_kojima-yes-no': 0.757576,\n",
       "     'qa-13_None_kojima-yes-no': 0.848485,\n",
       "     'qa-16_None_kojima-yes-no': 0.848485,\n",
       "     'qa-17_None_kojima-yes-no': 0.878788,\n",
       "     'zhou-01-ins_None_kojima-yes-no': 0.787879},\n",
       "    'text-davinci-002': {'None_None_kojima-yes-no': 0.393939,\n",
       "     'None_None_wei-01': 0.636364,\n",
       "     'None_kojima-01_kojima-yes-no': 0.515152,\n",
       "     'None_zhou-01_kojima-yes-no': 0.69697},\n",
       "    'text-davinci-003': {'None_None_kojima-yes-no': 0.606061,\n",
       "     'None_kojima-01_kojima-yes-no': 0.636364,\n",
       "     'None_zhou-01_kojima-yes-no': 0.636364}}}},\n",
       " 'worldtree': {'test': {'accuracy': {'command-xlarge-nightly': {'None_None_kojima-A-D': 0.606061,\n",
       "     'None_kojima-01_kojima-A-D': 0.606061,\n",
       "     'None_zhou-01_kojima-A-D': 0.757576},\n",
       "    'flan-T5-xxl': {'None_None_kojima-A-D': 0.878788,\n",
       "     'None_kojima-01_kojima-A-D': 0.787879,\n",
       "     'None_zhou-01_kojima-A-D': 0.818182,\n",
       "     'qa-10_None_kojima-A-D': 0.818182,\n",
       "     'qa-12_None_kojima-A-D': 0.909091,\n",
       "     'qa-13_None_kojima-A-D': 0.787879,\n",
       "     'qa-16_None_kojima-A-D': 0.636364,\n",
       "     'qa-17_None_kojima-A-D': 0.787879,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.848485},\n",
       "    'gpt-3.5-turbo': {'None_None_kojima-A-D': 0.939394,\n",
       "     'None_kojima-01_kojima-A-D': 0.909091,\n",
       "     'None_kojima-03_kojima-A-D': 0.939394,\n",
       "     'None_kojima-09_kojima-A-D': 0.939394,\n",
       "     'None_zhou-01_kojima-A-D': 0.939394,\n",
       "     'qa-01_None_kojima-A-D': 0.878788,\n",
       "     'qa-05_None_kojima-A-D': 0.69697,\n",
       "     'qa-08_None_kojima-A-D': 0.969697,\n",
       "     'qa-09_None_kojima-A-D': 0.848485,\n",
       "     'qa-10_None_kojima-A-D': 0.878788,\n",
       "     'qa-12_None_kojima-A-D': 0.878788,\n",
       "     'qa-13_None_kojima-A-D': 0.939394,\n",
       "     'qa-16_None_kojima-A-D': 0.848485,\n",
       "     'qa-17_None_kojima-A-D': 0.909091,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.969697,\n",
       "     'zhou-01-ins_zhou-01_kojima-A-D': 0.939394},\n",
       "    'gpt-4': {'None_None_kojima-A-D': 0.969697,\n",
       "     'None_zhou-01_kojima-A-D': 0.969697,\n",
       "     'qa-10_None_kojima-A-D': 0.969697,\n",
       "     'qa-12_None_kojima-A-D': 1.0,\n",
       "     'qa-13_None_kojima-A-D': 1.0,\n",
       "     'qa-16_None_kojima-A-D': 0.939394,\n",
       "     'qa-17_None_kojima-A-D': 0.969697,\n",
       "     'zhou-01-ins_None_kojima-A-D': 0.969697},\n",
       "    'text-davinci-002': {'None_None_kojima-A-D': 0.878788,\n",
       "     'None_kojima-01_kojima-A-D': 0.787879,\n",
       "     'None_zhou-01_kojima-A-D': 0.909091},\n",
       "    'text-davinci-003': {'None_None_kojima-A-D': 0.939394,\n",
       "     'None_kojima-01_kojima-A-D': 0.909091,\n",
       "     'None_zhou-01_kojima-A-D': 0.818182}}}}}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coll.evaluate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# join strings from a list with underscore\n",
    "def join_strings(list_of_strings):\n",
    "    if list_of_strings is None:\n",
    "        list_of_strings = [\"None\"]\n",
    "    if isinstance(list_of_strings, str):\n",
    "        list_of_strings = [list_of_strings]\n",
    "    joined_string = \"\"\n",
    "    for string in list_of_strings:\n",
    "        joined_string += (\"-\" + str(string))\n",
    "    # delete first underscore\n",
    "    joined_string = joined_string[1:]\n",
    "    return(joined_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ecfc69745a2447bf8f707670edcd0228",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4ba4d33c32a344b58a9fb4439dd0026e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2e11484c8a4c42f6bddf0950ce4454ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "344829a956874583b8596788920cb241",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f619790e67c749e9886962f3540b22f0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ab30e9b0bcbf4db28e9fcfdbc3ce7ac9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "coll.dump(\"thoughtsource_33\" + \"_\" + config['api_service'] + \"_\" + config['engine'].replace(\"/\", \"_\") + \"_\" + join_strings(config[\"instruction_keys\"]) + \"_\" + join_strings(config[\"cot_trigger_keys\"]) + \".json\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "a19cb823e24c98ee05a9cfa4a3a579b5d56d4c1a735f2a12456750b95a1e155e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
